plus sn10x

load dependancies

Baf53cre ENS neurons, SI data
Nb infection 5d, PBS(control) x4 INF(inflammation) x4

loading 8k cells for each,
demo called 20,985 cells
plus called 21,294 cells

clustering and annotation
ref-mapping to Cell2020 SS2-Colon (and Droplet-Ileum)

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32288 21294
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGCGATGAC-1 AAACCCAAGGCCCGTT-1 AAACCCAAGGTTAAAC-1
## Xkr4                     2                  .                  2
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCACAACTTCTT-1 AAACCCACAAGACGAC-1 AAACCCACATGACTAC-1
## Xkr4                     1                  .                  3
## Gm1992                   1                  .                  2
## Gm19938                  .                  .                  2
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     8 21294
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##             AAACCCAAGCGATGAC-1 AAACCCAAGGCCCGTT-1 AAACCCAAGGTTAAAC-1
## Baf53Nb.IF1                 86                 32                 19
## Baf53Nb.IF2                 75                 17                  4
## Baf53Nb.IF3                  8                  3                  7
## Baf53Nb.IF4                 19                  9                 42
## Baf53Nb.CL1                 72                 28                 15
## Baf53Nb.CL2                180                 81                 58
## Baf53Nb.CL3                 44                 14                246
## Baf53Nb.CL4                158                 65                 29
##             AAACCCACAACTTCTT-1 AAACCCACAAGACGAC-1 AAACCCACATGACTAC-1
## Baf53Nb.IF1                119                 44                123
## Baf53Nb.IF2                 23                  7                 16
## Baf53Nb.IF3                 54                  7                 14
## Baf53Nb.IF4                 27                 64                 28
## Baf53Nb.CL1                123                 31                579
## Baf53Nb.CL2                259                112                285
## Baf53Nb.CL3                 49                 32                 51
## Baf53Nb.CL4                216                 71                187
#rownames(FB) <- as.vector(sapply(rownames(FB),function(x){gsub("_",".",x)}))
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##             AAACCCAAGCGATGAC-1 AAACCCAAGGCCCGTT-1 AAACCCAAGGTTAAAC-1
## Baf53Nb.IF1                 86                 32                 19
## Baf53Nb.IF2                 75                 17                  4
## Baf53Nb.IF3                  8                  3                  7
## Baf53Nb.IF4                 19                  9                 42
## Baf53Nb.CL1                 72                 28                 15
## Baf53Nb.CL2                180                 81                 58
## Baf53Nb.CL3                 44                 14                246
## Baf53Nb.CL4                158                 65                 29
##             AAACCCACAACTTCTT-1 AAACCCACAAGACGAC-1 AAACCCACATGACTAC-1
## Baf53Nb.IF1                119                 44                123
## Baf53Nb.IF2                 23                  7                 16
## Baf53Nb.IF3                 54                  7                 14
## Baf53Nb.IF4                 27                 64                 28
## Baf53Nb.CL1                123                 31                579
## Baf53Nb.CL2                259                112                285
## Baf53Nb.CL3                 49                 32                 51
## Baf53Nb.CL4                216                 71                187
rowSums(FB)
## Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 Baf53Nb.CL1 Baf53Nb.CL2 
##     2311800      363799      206416      610890     2079472     5541002 
## Baf53Nb.CL3 Baf53Nb.CL4 
##     1101972     3880752
rowSums(FB>0)
## Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 Baf53Nb.CL1 Baf53Nb.CL2 
##       21268       20998       20850       21258       21269       21283 
## Baf53Nb.CL3 Baf53Nb.CL4 
##       21261       21275

FB

# shorten rownames    
#rownames(FB) <- paste0("hashtag",1:8)

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Baf53Nb_Ileum")

FB.seur
## An object of class Seurat 
## 8 features across 21294 samples within 1 assay 
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "Baf53Nb.IF1" "Baf53Nb.IF2" "Baf53Nb.IF3" "Baf53Nb.IF4" "Baf53Nb.CL1"
## [6] "Baf53Nb.CL2" "Baf53Nb.CL3" "Baf53Nb.CL4"
rownames(FB.seur@assays$RNA@counts)
## [1] "Baf53Nb.IF1" "Baf53Nb.IF2" "Baf53Nb.IF3" "Baf53Nb.IF4" "Baf53Nb.CL1"
## [6] "Baf53Nb.CL2" "Baf53Nb.CL3" "Baf53Nb.CL4"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

scales::show_col(ggsci::pal_igv("default")(49))

scales::show_col(ggsci::pal_d3("category20b")(20))

scales::show_col(ggsci::pal_d3("category20c")(20))

#color.FB <- ggsci::pal_d3("category20b")(20)[c(1,6,
#                                               3,8,13
#                                               )]

#color.FB <- scales::hue_pal()(5)  
#color.FB <- ggsci::pal_igv("default")(49)[c(1,4,5,6,7)]
color.FB <- ggsci::pal_d3("category20c")(20)[c(2,7,12,17,
                                               1,6,11,16
                                               )]

level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
par(mfrow=c(3,3))

plot(sort(rowSums(t(FB))),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("Baf",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("Baf",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

test q0.97-99

demultiplexing

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for Baf53Nb.IF1 : 58 reads
## Cutoff for Baf53Nb.IF2 : 17 reads
## Cutoff for Baf53Nb.IF3 : 12 reads
## Cutoff for Baf53Nb.IF4 : 31 reads
## Cutoff for Baf53Nb.CL1 : 77 reads
## Cutoff for Baf53Nb.CL2 : 203 reads
## Cutoff for Baf53Nb.CL3 : 35 reads
## Cutoff for Baf53Nb.CL4 : 137 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    11066     1558     8670
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##     Doublet    Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 
##       11066        1558        1059        1002         565         880 
## Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3 Baf53Nb.CL4 
##        1216        1395        1382        1171
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for Baf53Nb.IF1 : 61 reads
## Cutoff for Baf53Nb.IF2 : 18 reads
## Cutoff for Baf53Nb.IF3 : 13 reads
## Cutoff for Baf53Nb.IF4 : 34 reads
## Cutoff for Baf53Nb.CL1 : 82 reads
## Cutoff for Baf53Nb.CL2 : 219 reads
## Cutoff for Baf53Nb.CL3 : 37 reads
## Cutoff for Baf53Nb.CL4 : 146 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10464     1828     9002
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##     Doublet    Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 
##       10464        1828        1149        1025         554         891 
## Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3 Baf53Nb.CL4 
##        1270        1454        1427        1232
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for Baf53Nb.IF1 : 67 reads
## Cutoff for Baf53Nb.IF2 : 20 reads
## Cutoff for Baf53Nb.IF3 : 15 reads
## Cutoff for Baf53Nb.IF4 : 37 reads
## Cutoff for Baf53Nb.CL1 : 92 reads
## Cutoff for Baf53Nb.CL2 : 244 reads
## Cutoff for Baf53Nb.CL3 : 41 reads
## Cutoff for Baf53Nb.CL4 : 162 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9496     2256     9542
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##     Doublet    Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 
##        9496        2256        1292        1085         544         966 
## Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3 Baf53Nb.CL4 
##        1345        1518        1498        1294
# tag3 q0.97
# tags q0.99
cutoff.FB <- c(67,20,12,37,
               92,244,41,168)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 Baf53Nb.CL1 Baf53Nb.CL2 
##          67          20          12          37          92         244 
## Baf53Nb.CL3 Baf53Nb.CL4 
##          41         168
data.frame(cutoff.FB)
##             cutoff.FB
## Baf53Nb.IF1        67
## Baf53Nb.IF2        20
## Baf53Nb.IF3        12
## Baf53Nb.IF4        37
## Baf53Nb.CL1        92
## Baf53Nb.CL2       244
## Baf53Nb.CL3        41
## Baf53Nb.CL4       168
cutoff.list <- as.list(cutoff.FB)
color.list <- as.list(color.FB)
names(color.list) <- rownames(FB.seur)
par(mfrow=c(3,4))

plot(sort(rowSums(t(FB))),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main="sum")

plot.new()
plot.new()
plot.new()

plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.list$Baf53Nb.IF1, col = color.list$Baf53Nb.IF1)
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.list$Baf53Nb.IF2, col = color.list$Baf53Nb.IF2)
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.list$Baf53Nb.IF3, col = color.list$Baf53Nb.IF3)
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.list$Baf53Nb.IF4, col = color.list$Baf53Nb.IF4)
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.list$Baf53Nb.CL1, col = color.list$Baf53Nb.CL1)
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.list$Baf53Nb.CL2, col = color.list$Baf53Nb.CL2)
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.list$Baf53Nb.CL3, col = color.list$Baf53Nb.CL3)
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.list$Baf53Nb.CL4, col = color.list$Baf53Nb.CL4)

custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
dim(df.FB)
## [1] 21294     2
df.FB[1:10,]
##                    HTO_classification.global     hash.ID
## AAACCCAAGCGATGAC-1                   Doublet     Doublet
## AAACCCAAGGCCCGTT-1                  Negative    Negative
## AAACCCAAGGTTAAAC-1                   Doublet     Doublet
## AAACCCACAACTTCTT-1                   Doublet     Doublet
## AAACCCACAAGACGAC-1                   Singlet Baf53Nb.IF4
## AAACCCACATGACTAC-1                   Doublet     Doublet
## AAACCCAGTAAGCGGT-1                   Doublet     Doublet
## AAACCCAGTACAGGTG-1                   Singlet Baf53Nb.CL3
## AAACCCAGTGGGCTCT-1                   Singlet Baf53Nb.CL4
## AAACCCAGTGTCCGTG-1                   Doublet     Doublet
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3
##                  Doublet     9672        0           0           0           0
##                  Negative       0     2152           0           0           0
##                  Singlet        0        0        1303        1059         650
##                          hash.ID
## HTO_classification.global Baf53Nb.IF4 Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3
##                  Doublet            0           0           0           0
##                  Negative           0           0           0           0
##                  Singlet          924        1317        1492        1481
##                          hash.ID
## HTO_classification.global Baf53Nb.CL4
##                  Doublet            0
##                  Negative           0
##                  Singlet         1244
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                       orig.ident nCount_RNA nFeature_RNA nCount_HTO
## AAACCCAAGCGATGAC-1 Baf53Nb_Ileum        642            8        642
## AAACCCAAGGCCCGTT-1 Baf53Nb_Ileum        249            8        249
## AAACCCAAGGTTAAAC-1 Baf53Nb_Ileum        420            8        420
## AAACCCACAACTTCTT-1 Baf53Nb_Ileum        870            8        870
##                    nFeature_HTO   HTO_maxID HTO_secondID HTO_margin
## AAACCCAAGCGATGAC-1            8 Baf53Nb.IF2  Baf53Nb.IF1  1.1203044
## AAACCCAAGGCCCGTT-1            8 Baf53Nb.IF2  Baf53Nb.CL4  0.4521962
## AAACCCAAGGTTAAAC-1            8 Baf53Nb.CL3  Baf53Nb.IF4  1.0136111
## AAACCCACAACTTCTT-1            8 Baf53Nb.IF3  Baf53Nb.CL1  0.9739863
##                         HTO_classification HTO_classification.global  hash.ID
## AAACCCAAGCGATGAC-1 Baf53Nb.IF1_Baf53Nb.IF2                   Doublet  Doublet
## AAACCCAAGGCCCGTT-1                Negative                  Negative Negative
## AAACCCAAGGTTAAAC-1 Baf53Nb.CL3_Baf53Nb.IF4                   Doublet  Doublet
## AAACCCACAACTTCTT-1 Baf53Nb.CL1_Baf53Nb.IF3                   Doublet  Doublet
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,12500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0704.seur.subset.rds")
FB.seur.subset <- readRDS("FB0704.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID.sort', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,12500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.025)

table(FB.seur@meta.data$hash.ID.sort)
## 
##     Doublet    Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 
##        9672        2152        1303        1059         650         924 
## Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3 Baf53Nb.CL4 
##        1317        1492        1481        1244
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,11000),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+625,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "Baf53Nb_Ileum")
GEX.seur
## An object of class Seurat 
## 23258 features across 21294 samples within 1 assay 
## Active assay: RNA (23258 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##     Doublet    Negative Baf53Nb.IF1 Baf53Nb.IF2 Baf53Nb.IF3 Baf53Nb.IF4 
##        9672        2152        1303        1059         650         924 
## Baf53Nb.CL1 Baf53Nb.CL2 Baf53Nb.CL3 Baf53Nb.CL4 
##        1317        1492        1481        1244
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

subset singlets

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 23258 features across 9470 samples within 1 assay 
## Active assay: RNA (23258 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA > 500 & nFeature_RNA < 3800 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat 
## 23258 features across 9423 samples within 1 assay 
## Active assay: RNA (23258 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,11000),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+625,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,11000),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+625,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: Uhrf1, E2f8, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Cdk1, Cdc25c,
## Nek2, not searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

nearly no cycling !

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 100)
##   [1] "Gal"           "Mgat4c"        "Gm32647"       "Grm5"         
##   [5] "Nmu"           "Zfp804a"       "Gm29521"       "Col24a1"      
##   [9] "Cntn5"         "Klhl1"         "Adgrg6"        "4930432L08Rik"
##  [13] "Cemip"         "Brinp3"        "Gm30613"       "Ntng1"        
##  [17] "Cpne4"         "Robo2"         "Kcnb2"         "Tmeff2"       
##  [21] "Cntnap2"       "Nrxn3"         "Myh11"         "Ano2"         
##  [25] "Lingo2"        "Gpr149"        "Zfp804b"       "Cdh8"         
##  [29] "Ebf1"          "Gm21847"       "Dgkg"          "Wdr17"        
##  [33] "Cdh18"         "Gm49953"       "Cdh9"          "Ntrk3"        
##  [37] "Pcdh11x"       "Unc5d"         "Pdzrn4"        "Gm15261"      
##  [41] "Sema5a"        "Cmah"          "Sst"           "Sgcz"         
##  [45] "Nxph2"         "Schip1"        "Dgki"          "Robo1"        
##  [49] "Astn2"         "Vwc2l"         "Vip"           "Ano1"         
##  [53] "1700111E14Rik" "Itgb6"         "Kcnip4"        "Dcc"          
##  [57] "Car10"         "Prkg1"         "Gm38405"       "Calcb"        
##  [61] "Pcdh9"         "Nrg1"          "Cdh6"          "Rbpms"        
##  [65] "Clstn2"        "4930509J09Rik" "Grin3a"        "Bmpr1b"       
##  [69] "March1"        "Rasgef1b"      "4930422I22Rik" "AC150683.1"   
##  [73] "Cdh19"         "Piezo2"        "Myl1"          "Pde1a"        
##  [77] "Rab27b"        "Gm15584"       "Gm30094"       "Efr3a"        
##  [81] "Ccbe1"         "Hbb-y"         "Plxna4"        "Dpp10"        
##  [85] "Gpc5"          "Pbx3"          "Gm49938"       "Lpp"          
##  [89] "Fut9"          "Postn"         "Gm15680"       "Nkain3"       
##  [93] "Pcdh10"        "Gm20754"       "8030451A03Rik" "Col25a1"      
##  [97] "Prkcq"         "Asic2"         "Egfem1"        "Gm30382"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist|Rik$|^AC|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


GEX.seur <- RunPCA(GEX.seur, features = setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) ))
## PC_ 1 
## Positive:  Ntrk3, Tmeff2, Robo2, Cdh8, Nrxn3, Ano2, Cpne4, Clstn2, Myl1, Mgat4c 
##     Dgkg, Gpr149, Pdzrn4, Plxna4, Grin3a, Spock3, Adgrg6, Pcdh10, Cdh6, Cntn5 
##     Itgb6, Zfp804a, Sgcz, Cysltr2, Cntnap2, Cacna2d3, Kif26b, Astn2, Iqgap2, Cux2 
## Negative:  Lrrtm4, Galntl6, Tox, Ndst4, Grik1, Cacnb2, Pde3a, Kcnd2, Bnc2, Lrp1b 
##     Pcdh7, Epha5, Synpo2, Tshz2, Syt6, Cdc14a, Plcxd3, Kcnab1, Chrna7, Specc1 
##     Zfp536, Galnt17, Pitpnc1, Nos1, Lrrc4c, Lrrc7, Brinp2, Fbxw15, Fat3, Tenm3 
## PC_ 2 
## Positive:  Rbfox1, Ptprt, Bnc2, Tafa1, Tshz2, Grik1, Gpc6, Frmd4b, Plcxd3, St6galnac3 
##     Caln1, Tox, Tcf7l2, Brinp2, Cdc14a, Fbxw15, Dock2, Fbxw24, Tmem132c, Oprk1 
##     Pcdh7, Pld5, Nfia, Sdk2, Specc1, Adamtsl1, Slc35f4, Tafa5, Slc26a4, Tmem163 
## Negative:  Nos1, Auts2, Egfem1, Etv1, Cadps2, Kcnq5, Schip1, Asic2, Gfra1, Cmah 
##     Dach1, Dgkb, Epha5, Kcnt2, Kcnab1, Rgs6, Stxbp6, Alcam, Creb5, Lrrc4c 
##     Il1rapl1, Hs6st3, Adgrl3, Cntnap5a, Ebf1, Tmem108, Cdh11, Gria3, Grid1, Pde4d 
## PC_ 3 
## Positive:  Cdh18, Klhl1, Kcnip4, Pbx3, Meis1, Kctd8, Gpc6, Htr4, C79798, Csmd3 
##     Sema5a, Gabrg3, Cntn3, Dlc1, Vwc2l, Serpini1, Zbbx, Skap1, Stxbp5l, Zfhx3 
##     Plcl1, Csmd1, March1, Cdh9, Galnt18, Cadm2, Pde4d, Khdrbs2, Unc5c, Pakap 
## Negative:  Adgrg6, Sgcd, Slc4a4, Cysltr2, Itgb6, Nos1, Nmu, Gpr149, Grin3a, Dapk2 
##     Rgs6, Nfia, Ano2, Zfp804a, Dgkg, Cbln2, Lhfpl2, Kcnab1, Ccbe1, Gfra1 
##     Zfp536, Cntnap5a, Otof, Cpne4, Efr3a, Syt15, Scn11a, Ngfr, Smad6, Pex7 
## PC_ 4 
## Positive:  Dock10, Kcnip4, Lingo2, Csmd3, Sorcs1, Gda, Ndst4, Cntn5, Tac1, Nxph1 
##     Thsd4, Cntn3, Kctd8, Nyap2, Sgcz, Lrp1b, Robo1, Kcnq5, Pld5, Lrrc7 
##     Tenm2, Lrrtm4, Abca5, Lin7a, Plcb1, Sorcs3, Epha5, Grm7, Cpne8, Kctd16 
## Negative:  Klhl1, Sema5a, Vwc2l, March1, Rasgef1b, Cdh9, Galnt18, Prune2, Il1rapl1, Alk 
##     Pbx3, Kcnh7, Mgat4c, Bcl2, Dpp10, Zfhx3, Zbbx, Chsy3, Grm5, C79798 
##     Galnt17, Pcdh7, Lncbate1, Vcan, Auts2, Ghr, Dcc, Scgn, Kcnb2, Olfr78 
## PC_ 5 
## Positive:  Ntng1, Ebf1, Trps1, Trhde, Cntn4, Gna14, Csmd1, Zmat4, Tmtc2, Nkain3 
##     Col18a1, Sctr, Tenm4, Myo1e, Nxph2, Cpa6, Nrg1, Kcnd2, Nav2, Ccdc85a 
##     Myo16, Sez6l, Neurod6, Fstl4, Nrp1, Fbn2, Npas3, Glp1r, Gal, Flrt2 
## Negative:  Kcnt2, Prkg1, Dgkb, Alcam, Car10, Rasgef1b, Vwc2l, Mgat4c, Epha5, Vcan 
##     Galntl6, Gucy1a2, Cdh20, Gabrg3, Dmd, Dpp10, Thsd7b, Cdh9, Lingo2, Rgs6 
##     Sema5a, Khdrbs2, Htr4, Kctd8, Nmur2, Antxr2, Lrrc4c, Ptprz1, Galnt13, Adcy2
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ))
## [1] 1078
head(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ),300)
##   [1] "Gal"        "Mgat4c"     "Grm5"       "Nmu"        "Zfp804a"   
##   [6] "Col24a1"    "Cntn5"      "Klhl1"      "Adgrg6"     "Cemip"     
##  [11] "Brinp3"     "Ntng1"      "Cpne4"      "Robo2"      "Kcnb2"     
##  [16] "Tmeff2"     "Cntnap2"    "Nrxn3"      "Myh11"      "Ano2"      
##  [21] "Lingo2"     "Gpr149"     "Zfp804b"    "Cdh8"       "Ebf1"      
##  [26] "Dgkg"       "Wdr17"      "Cdh18"      "Cdh9"       "Ntrk3"     
##  [31] "Pcdh11x"    "Unc5d"      "Pdzrn4"     "Sema5a"     "Cmah"      
##  [36] "Sst"        "Sgcz"       "Nxph2"      "Schip1"     "Dgki"      
##  [41] "Robo1"      "Astn2"      "Vwc2l"      "Vip"        "Ano1"      
##  [46] "Itgb6"      "Kcnip4"     "Dcc"        "Car10"      "Prkg1"     
##  [51] "Calcb"      "Pcdh9"      "Nrg1"       "Cdh6"       "Rbpms"     
##  [56] "Clstn2"     "Grin3a"     "Bmpr1b"     "March1"     "Rasgef1b"  
##  [61] "Cdh19"      "Piezo2"     "Myl1"       "Pde1a"      "Rab27b"    
##  [66] "Efr3a"      "Ccbe1"      "Plxna4"     "Dpp10"      "Gpc5"      
##  [71] "Pbx3"       "Lpp"        "Fut9"       "Postn"      "Nkain3"    
##  [76] "Pcdh10"     "Col25a1"    "Prkcq"      "Asic2"      "Egfem1"    
##  [81] "Actg2"      "Trhde"      "Tbx20"      "Cysltr2"    "Skap1"     
##  [86] "Serpini1"   "Tac1"       "Kcnh7"      "Meis2"      "Zfhx3"     
##  [91] "Tafa2"      "Abi3bp"     "Chrna9"     "Dkk2"       "Csmd3"     
##  [96] "Acp7"       "Spock3"     "Cntn4"      "Exoc3l2"    "Penk"      
## [101] "Luzp2"      "Kctd16"     "Galnt13"    "Pde4d"      "Fgf14"     
## [106] "Kcnq5"      "Myo3b"      "Abca9"      "Hpse2"      "Csmd1"     
## [111] "Ghr"        "Cartpt"     "Tnr"        "Nos1"       "Mid1"      
## [116] "Cacna2d3"   "Plcl1"      "Olfr889"    "Gpm6a"      "Dgkb"      
## [121] "Iqgap2"     "Grp"        "Spag16"     "Trps1"      "Flrt2"     
## [126] "Il1rapl1"   "Sox2ot"     "Capn9"      "Gna14"      "Arhgap6"   
## [131] "Nell1"      "Parm1"      "Rerg"       "Cpa6"       "Hs6st3"    
## [136] "Chsy3"      "Slco1a1"    "Chrm3"      "Kctd8"      "Fbxl7"     
## [141] "Myh3"       "Adamts9"    "Cadm2"      "Nxph1"      "Myocd"     
## [146] "Slc4a4"     "Synpr"      "Lama2"      "Cdh10"      "Kif26b"    
## [151] "Galnt18"    "Kcnd2"      "Itga1"      "Adamts6"    "Cacnb4"    
## [156] "Grm7"       "Mir99ahg"   "Aff2"       "Grik1"      "Galntl6"   
## [161] "Opcml"      "Cadps2"     "Foxp2"      "Cntn3"      "Il18r1"    
## [166] "Hs3st4"     "Mecom"      "Esr1"       "Prune2"     "Gabrg3"    
## [171] "Rasgrf1"    "Shisa9"     "Etl4"       "Col8a1"     "Epha7"     
## [176] "Dach1"      "Ano5"       "Zbbx"       "Fn1"        "Prr16"     
## [181] "Car9"       "L3mbtl4"    "Sox6"       "Antxr2"     "Thsd7b"    
## [186] "Arhgap15"   "Morc1"      "Igfbp5"     "Cux2"       "Ntrk2"     
## [191] "Scnn1a"     "Met"        "Terb2"      "Trpm5"      "Dock10"    
## [196] "Stab1"      "Galr1"      "Kirrel3"    "Npas3"      "Sctr"      
## [201] "Olfr78"     "Cbln2"      "Vcan"       "Rarb"       "Grm8"      
## [206] "Fstl4"      "Etv1"       "Mrc1"       "Scn7a"      "Ntm"       
## [211] "C79798"     "Sema3a"     "Tenm4"      "Bche"       "Glis3"     
## [216] "Fam129a"    "Egfr"       "Calca"      "Gabrb1"     "Adam2"     
## [221] "Plcb1"      "Tenm2"      "Slc35f1"    "Ttc29"      "Otof"      
## [226] "Pde7b"      "Nr4a3"      "D5Ertd615e" "Rmst"       "Hcn1"      
## [231] "Alcam"      "Terf1"      "Pak7"       "Gng2"       "Calcrl"    
## [236] "Sorcs3"     "Kcnmb2"     "Lrrc4c"     "Lhfpl2"     "Nell1os"   
## [241] "Lockd"      "Dapk2"      "Rxfp2"      "Necab1"     "Runx1"     
## [246] "Cntnap5b"   "Gfra1"      "Ror1"       "Cald1"      "Col11a1"   
## [251] "Htr4"       "Cfap299"    "Saysd1"     "Stxbp6"     "Plxdc2"    
## [256] "Nwd2"       "Cfh"        "Pdgfra"     "Pde4c"      "Bpifc"     
## [261] "Thsd4"      "Ror2"       "Myo1h"      "Pappa"      "Nfatc1"    
## [266] "Creb5"      "Tnc"        "Moxd1"      "Sorcs1"     "Rhoj"      
## [271] "Loxl1"      "Slc44a5"    "Tenm1"      "Plac8"      "Kit"       
## [276] "Mast4"      "Rgs6"       "Slc7a15"    "Loxl2"      "Rxfp1"     
## [281] "Nox4"       "Ptprc"      "Adgrl2"     "Tll1"       "Itgbl1"    
## [286] "Dach2"      "Pde1c"      "Fendrr"     "Trpc3"      "Eya1"      
## [291] "Ccdc85a"    "Ifi203"     "Tex21"      "Npnt"       "Ngfr"      
## [296] "Ptprt"      "Rtl4"       "Slc24a3"    "Best2"      "Dlc1"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param =20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9423
## Number of edges: 389694
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8391
## Number of communities: 24
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity=100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 135)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:45:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:45:27 Read 9423 rows and found 24 numeric columns
## 15:45:27 Using Annoy for neighbor search, n_neighbors = 20
## 15:45:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:45:29 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpGEZD2t\filed04841fb7f60
## 15:45:29 Searching Annoy index using 1 thread, search_k = 2000
## 15:45:31 Annoy recall = 100%
## 15:45:31 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 15:45:32 Initializing from normalized Laplacian + noise (using irlba)
## 15:45:32 Commencing optimization for 500 epochs, with 278036 positive edges
## 15:45:56 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

#saveRDS(GEX.seur, "GEX.seur.afterUMAP.rds")
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

check some markers

markers.neur <- list(PEMN=c("Chat","Tac1","Drd2"),
                     PIMN=c("Nos1","Vip","Adm","Lgr5"),
                     PIN=c("Penk","Prlr","Mtnr1a"),
                     PSN=c("Calca","Calcb","Cck","Bdnf",
                           "Piezo2","Nog","Nmu","Sst","Il4ra",
                           "Il13ra1","Il7"),
                     PSVN=c("Glp2r","Fst","Csf2rb","Csf2rb2"),
                     Glia=c("Plp1","Gfap","Rxrg"),  # add three more
                     mosue=c("Fos","Actl6a","Actl6b","Phox2b","Sox10","Mki67","Top2a")  # Baf53
                     )
unlist(markers.neur)
##     PEMN1     PEMN2     PEMN3     PIMN1     PIMN2     PIMN3     PIMN4      PIN1 
##    "Chat"    "Tac1"    "Drd2"    "Nos1"     "Vip"     "Adm"    "Lgr5"    "Penk" 
##      PIN2      PIN3      PSN1      PSN2      PSN3      PSN4      PSN5      PSN6 
##    "Prlr"  "Mtnr1a"   "Calca"   "Calcb"     "Cck"    "Bdnf"  "Piezo2"     "Nog" 
##      PSN7      PSN8      PSN9     PSN10     PSN11     PSVN1     PSVN2     PSVN3 
##     "Nmu"     "Sst"   "Il4ra" "Il13ra1"     "Il7"   "Glp2r"     "Fst"  "Csf2rb" 
##     PSVN4     Glia1     Glia2     Glia3    mosue1    mosue2    mosue3    mosue4 
## "Csf2rb2"    "Plp1"    "Gfap"    "Rxrg"     "Fos"  "Actl6a"  "Actl6b"  "Phox2b" 
##    mosue5    mosue6    mosue7 
##   "Sox10"   "Mki67"   "Top2a"
DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

#DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "sort_clusters",
#        cols = c("midnightblue","darkorange1")) +
#  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "seurat_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

refmapping

public SS2

ss2_neur.seur <- readRDS("I:/Shared_win/projects/ENS_data/SCP1038_ENS_scRNAseq_Cell2020/final_RAISIN/ss2_neur.seur_traj.rds")
ss2_neur.seur
## An object of class Seurat 
## 20036 features across 2657 samples within 1 assay 
## Active assay: RNA (20036 features, 1467 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
(DimPlot(ss2_neur.seur, reduction = "tsne", label = T, group.by = "inlocal") + NoLegend()) + 
  (DimPlot(ss2_neur.seur, reduction = "umap", label = T, group.by = "inlocal")+ NoLegend())

mapping

ss2_neur.seur <- RunUMAP(ss2_neur.seur, umap.method = "uwot-learn",dims = 1:15)
## Warning in RunUMAP.default(object = data.use, reduction.model =
## reduction.model, : 'uwot-learn' is deprecated. Set umap.method = 'uwot' and
## return.model = TRUE
## UMAP will return its model
## 15:46:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:46:08 Read 2657 rows and found 15 numeric columns
## 15:46:08 Using Annoy for neighbor search, n_neighbors = 30
## 15:46:08 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:46:09 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpGEZD2t\filed0481a1646b2
## 15:46:09 Searching Annoy index using 1 thread, search_k = 3000
## 15:46:09 Annoy recall = 100%
## 15:46:10 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:46:10 Initializing from normalized Laplacian + noise (using irlba)
## 15:46:10 Commencing optimization for 500 epochs, with 101888 positive edges
## 15:46:17 Optimization finished
anchors.ss2 <- FindTransferAnchors(
  reference = ss2_neur.seur,
  reference.assay = 'RNA',
  query = GEX.seur,
  query.assay = 'RNA',
  reference.reduction = 'pca',
  #features = Feat.mm,
  normalization.method = 'LogNormalize',
  reduction = 'pcaproject',
  dims = 1:30
)
## Projecting cell embeddings
## Finding neighborhoods
## Finding anchors
##  Found 510 anchors
## Filtering anchors
##  Retained 371 anchors
local_neur.pred <- MapQuery(
  anchorset = anchors.ss2,
  query = GEX.seur,
  reference = ss2_neur.seur,
  refdata = list(
    inlocal = 'inlocal'
  ),
  reference.reduction = 'pca',
  reduction.model = 'umap'
)
## Warning: `invoke()` is deprecated as of rlang 0.4.0.
## Please use `exec()` or `inject()` instead.
## This warning is displayed once per session.
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 
## Integrating dataset 2 with reference dataset
## Finding integration vectors
## Integrating data
## Computing nearest neighbors
## Running UMAP projection
## 15:46:38 Read 9423 rows
## 15:46:38 Processing block 1 of 1
## 15:46:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:46:38 Initializing by weighted average of neighbor coordinates using 1 thread
## 15:46:38 Commencing optimization for 167 epochs, with 282690 positive edges
## 15:46:47 Finished
local_neur.pred
## An object of class Seurat 
## 23283 features across 9423 samples within 2 assays 
## Active assay: RNA (23258 features, 1500 variable features)
##  1 other assay present: prediction.score.inlocal
##  5 dimensional reductions calculated: pca, tsne, umap, ref.pca, ref.umap
raw_p1 <- DimPlot(local_neur.pred,
                  reduction = 'umap',
                  group.by = 'seurat_clusters',
                  label=TRUE,
                  label.size = 3,
                  repel = F) + NoLegend()+ labs(title="local clusters")
raw_p2 <- DimPlot(local_neur.pred,
                  reduction = 'umap',
                  group.by = 'predicted.inlocal',
                  label=TRUE,
                  label.size = 3,
                  repel = T) + NoLegend()
raw_p3 <- DimPlot(local_neur.pred,
                  reduction = 'ref.umap',
                  group.by = 'seurat_clusters',
                  label=TRUE,
                  label.size = 3,
                  repel = TRUE) + NoLegend()+ labs(title="") 
raw_p4 <- DimPlot(local_neur.pred,
                  reduction = 'ref.umap',
                  group.by = 'predicted.inlocal',
                  label=TRUE,
                  label.size = 3,
                  repel = TRUE) + NoLegend()

check

cowplot::plot_grid(raw_p1,raw_p2,ncol = 1)

FeaturePlot(GEX.seur, features = c("Chat","Nos1","Penk","Ptprt",
                                   "Calcb","Nmu","Cdh9","Sst"), ncol = 4)

FeaturePlot(GEX.seur, features = c("Calcb","Nmu","Il13ra1","Il4ra",
                                   "Aff2","Gpr149","Ano2","Ntrk3"), ncol = 4)

FeaturePlot(local_neur.pred, features = "predicted.inlocal.score")

table(prediction=local_neur.pred$predicted.inlocal,
      clusters=local_neur.pred$seurat_clusters)
##           clusters
## prediction    0    1    2    3    4    5    6    7    8    9   10   11   12
##     PEMN_1    0    1    0  743   97    0    0    0  471    2    1  288    2
##     PEMN_4 1093    1  791    1  612    0    0    0   12    1    1    0    0
##     PEMN_5    0    0    0    0    9    0    0    0    2    0    0    0    0
##     PIMN_3    0    6    0    0    0    8   44    1    0    1    0    0    0
##     PIMN_6    0  633    0    0    0   47    0    0    0    0    0    0    0
##     PIMN_7    0  260    0    0    0  509  523    0    0    0    0    1    1
##     PIN_1     0    0    0    0    0    0    0    0    0    0    0    0    1
##     PIN_3     0    0    0    0    0    0    0    0    0    0    0    0    0
##     PIN_4     0    0    0    0    0    0    0    1    0    0    0   14    0
##     PSN_1     0    0    0    0    0    0    0  440    0    0  326    0    0
##     PSN_2     0    0    1    0    0    0    0    8    0  443   11    0    0
##     PSN_3     5    0    7    0    0    0    0   41    0    0   28    0    0
##     PSVN_1    0    2    0    0    1    0    0    2    0    0    1    0    0
##     PSVN_2    0   24    0    0    0    9    0    0    0    0    0    0  278
##           clusters
## prediction   13   14   15   16   17   18   19   20   21   22   23
##     PEMN_1    2    4    2    3   51    1   17    1   11    0    1
##     PEMN_4    1    1    0    0   71    0   35    2   30    0    0
##     PEMN_5    0    0    1    0    6    0    0    0    0    0    0
##     PIMN_3    2  159    0    0    0    0    2    1    2    0    0
##     PIMN_6    0    0    1    0   21    0   14    0    7    0    0
##     PIMN_7    0   55    0    0   20    1    7    0   10    0    0
##     PIN_1   260    9    0    0    0    0    2  101    0    0    0
##     PIN_3     0    0    0    1    0    0    0    0    0    0    0
##     PIN_4     0    0    0  188    0    0    0    0    0    0    0
##     PSN_1     0    0    0    0    0    0    0    0    0    0    0
##     PSN_2     2    0  220    0    0    0    6    1    2    0    0
##     PSN_3     0    0    0    0    1    0    9    0    0   46    0
##     PSVN_1    0    3    0    0    0    5   13    0    3    1   39
##     PSVN_2    1    0    0    0    1  150    8    1    3    0    0
table(FB.info=local_neur.pred$FB.info,
                 clusters=local_neur.pred$seurat_clusters)
##              clusters
## FB.info         0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
##   Doublet       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Negative      0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Baf53Nb.IF1 105 155 165  99  81  88  75   9  54  51 118  45  42  40  19  26
##   Baf53Nb.IF2  85  97  96  86  85  68  59   8  57  65 102  29  32  30  24  22
##   Baf53Nb.IF3  62  74  54  46  55  40  46   8  34  33  37  20  18  20  16  21
##   Baf53Nb.IF4 102  89  59  74  68  56  63   7  43  39  99  26  27  23  27  28
##   Baf53Nb.CL1 164 120  93  99  96  75  86 109  74  54   2  50  43  42  36  35
##   Baf53Nb.CL2 204 145 108 112 116  90  83 136  77  74   2  49  46  42  37  37
##   Baf53Nb.CL3 206 141 136 104 102  85  88 115  83  77   5  48  46  37  34  29
##   Baf53Nb.CL4 170 106  88 124 116  71  67 101  63  54   3  36  28  34  38  26
##              clusters
## FB.info        16  17  18  19  20  21  22  23
##   Doublet       0   0   0   0   0   0   0   0
##   Negative      0   0   0   0   0   0   0   0
##   Baf53Nb.IF1  24  30  19  14  18   6   5   4
##   Baf53Nb.IF2  17  13  28  14  13   6   9   7
##   Baf53Nb.IF3   9  12  10  11   9   2   3   4
##   Baf53Nb.IF4  27  10  13  12   5  11   6   5
##   Baf53Nb.CL1  30  24  15  22  19  15   7   5
##   Baf53Nb.CL2  33  20  29  13  18   8   2   3
##   Baf53Nb.CL3  31  33  19  19  17   8   6   8
##   Baf53Nb.CL4  21  29  24   8   8  12   9   4
pheatmap::pheatmap(table(prediction=local_neur.pred$predicted.inlocal,
      clusters=local_neur.pred$seurat_clusters),
                   main = "Cell Count",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f")

pheatmap::pheatmap(t(100*rowRatio(t(table(prediction=local_neur.pred$predicted.inlocal,
                                clusters=local_neur.pred$seurat_clusters)))),
                   main = "Cell Ratio",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f")

cowplot::plot_grid(
pheatmap::pheatmap(table(prediction=local_neur.pred$predicted.inlocal,
      clusters=local_neur.pred$seurat_clusters),
                   main = "Cell Count",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", silent = T, legend = F)$gtable,

pheatmap::pheatmap(t(100*rowRatio(t(table(prediction=local_neur.pred$predicted.inlocal,
                                clusters=local_neur.pred$seurat_clusters)))),
                   main = "Cell Ratio",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f",silent=T, legend = F)$gtable,
ncol = 1)

pheatmap::pheatmap(table(FB.info=local_neur.pred$FB.info,
      clusters=local_neur.pred$seurat_clusters),
                   main = "Cell Count",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f")

pheatmap::pheatmap(t(100*rowRatio(t(table(FB.info=local_neur.pred$FB.info,
                                clusters=local_neur.pred$seurat_clusters)))),
                   main = "Cell Ratio",
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f")

VlnPlot(local_neur.pred, group.by = "seurat_clusters", features = "predicted.inlocal.score", y.max = 1.1) + NoLegend()

data.frame(cluster=local_neur.pred$seurat_clusters,
           score=local_neur.pred$predicted.inlocal.score) %>%
  group_by(cluster) %>%
  summarise(mean_prediction=round(mean(score),2) ) %>% 
  #t() %>% 
  #as.table() %>%
  ggplot(mapping = aes(x=cluster,y=mean_prediction))+  geom_col() + theme_bw()

resort results

# this sorting step is done after refmapping, then replot those results using resorted cluters           
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(2,0,4,8,3,11, # PEMNs
                                            1,5,6,14,  # PIMNs
                                            16,13,20, # PINs
                                            7,10,15,9,22, # PSNs
                                            23,18,12,  # PSVN
                                            17,19,  # MIX 
                                            21        # Glia
                                            ))
local_neur.pred$sort_clusters <- GEX.seur$sort_clusters
pheatmap::pheatmap(table(prediction=local_neur.pred$predicted.inlocal,
      clusters=local_neur.pred$sort_clusters),
                   main = "Cell Count",
      gaps_row = c(3,6,9,12),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f")

pheatmap::pheatmap(t(100*rowRatio(t(table(prediction=local_neur.pred$predicted.inlocal,
                                clusters=local_neur.pred$sort_clusters)))),
                   main = "Cell Ratio",
      gaps_row = c(3,6,9,12),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f")

cowplot::plot_grid(
pheatmap::pheatmap(table(prediction=local_neur.pred$predicted.inlocal,
      clusters=local_neur.pred$sort_clusters),
                   main = "Cell Count",
      gaps_row = c(3,6,9,12),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f",silent = T, legend = F)$gtable,

pheatmap::pheatmap(t(100*rowRatio(t(table(prediction=local_neur.pred$predicted.inlocal,
                                clusters=local_neur.pred$sort_clusters)))),
                   main = "Cell Ratio",
      gaps_row = c(3,6,9,12),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f",silent = T, legend = F)$gtable,
ncol = 1)

VlnPlot(local_neur.pred, group.by = "sort_clusters", features = "predicted.inlocal.score", y.max = 1.1) + NoLegend()

data.frame(cluster=local_neur.pred$sort_clusters,
           score=local_neur.pred$predicted.inlocal.score) %>%
  group_by(cluster) %>%
  summarise(mean_prediction=round(mean(score),2) ) %>% 
  #t() %>% 
  #as.table() %>%
  ggplot(mapping = aes(x=cluster,y=mean_prediction))+  geom_col() + theme_bw()

DotPlot(GEX.seur, features = c(as.vector(unlist(markers.neur)),
                               
                               "Acta2","Myh11",
                               "Kit","Sctr","Ano1",
                               "Rgs9","Lrig1","Rspo3","Ntsr1",
                               "Trpc4","Kcnmb2",
                               #"P2ry14","Col8a1",
                               "Pdgfra","Kcnn3","Pecam1",
                               "Krt19","Lrrn4","Upk1b",#"Epcam",
                               "Ptprc","Adgre1","Lyz2"), 
        group.by = "sort_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$sort_clusters)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$sort_clusters)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

preAnno

GEX.seur@meta.data$preAnno1 <- as.character(GEX.seur@meta.data$seurat_clusters)

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(2)] <- "PEMN.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(0)] <- "PEMN.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(4)] <- "PEMN.3"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(8)] <- "PEMN.4"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(3)] <- "PEMN.5"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(11)] <- "PEMN.6"

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(1)] <- "PIMN.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(5)] <- "PIMN.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(6)] <- "PIMN.3"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(14)] <- "PIMN.4"

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(16)] <- "PIN.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(13)] <- "PIN.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(20)] <- "PIN.3"

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(7)] <- "PSN.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(10)] <- "PSN.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(15)] <- "PSN.3"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(9)] <- "PSN.4"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(22)] <- "PSN.5"

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(23)] <- "PSVN.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(18)] <- "PSVN.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(12)] <- "PSVN.3"

GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(17)] <- "MIX.1"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(19)] <- "MIX.2"
GEX.seur@meta.data$preAnno1[GEX.seur@meta.data$preAnno1 %in% c(21)] <- "Glia"

GEX.seur@meta.data$preAnno1 <- factor(GEX.seur@meta.data$preAnno1,
                                      levels = c(paste0("PEMN.",1:6),
                                                 paste0("PIMN.",1:4),
                                                 paste0("PIN.",1:3),
                                                 paste0("PSN.",1:5),
                                                 paste0("PSVN.",1:3),
                                                 "MIX.1","MIX.2","Glia"))


GEX.seur@meta.data$preAnno2 <- gsub(".1|.2|.3|.4|.5|.6","",as.character(GEX.seur@meta.data$preAnno1))
GEX.seur@meta.data$preAnno2 <- factor(GEX.seur@meta.data$preAnno2,
                                      levels = c("PEMN","PIMN","PIN","PSN","PSVN","MIX","Glia"))
head(GEX.seur@meta.data$preAnno2)
## [1] PEMN PEMN PSN  PEMN PEMN PIN 
## Levels: PEMN PIMN PIN PSN PSVN MIX Glia
gsub(".1|.2|.3|.4|.5|.6","",head(GEX.seur@meta.data$preAnno2))
## [1] "PEMN" "PEMN" "PSN"  "PEMN" "PEMN" "PIN"
DimPlot(GEX.seur, reduction = "umap", group.by = "seurat_clusters", label = T) + 
  DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, repel = F)

DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 2.5) + 
  DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, repel = F)

cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 2.5) + coord_fixed(ratio = 0.95) + NoLegend(),
  DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, repel = F) + coord_fixed(ratio = 0.95) + NoLegend(),
ncol = 1)

cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 2.5) + coord_fixed(ratio = 0.95),
  DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, repel = F) + coord_fixed(ratio = 0.95),
ncol = 1)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$preAnno1)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$preAnno1)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

GEX.seur@meta.data$cnt <- factor(gsub("1$|2$|3$|4$","",as.character(GEX.seur@meta.data$FB.info)))
#GEX.seur@meta.data$cnt <- as.character(GEX.seur@meta.data$FB.info)
cowplot::plot_grid(
pheatmap::pheatmap(table(cnt=GEX.seur$cnt,
      clusters=GEX.seur$preAnno1),
                   main = "Cell Count",
                   gaps_row = c(1),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(cnt=GEX.seur$cnt,
                                clusters=GEX.seur$preAnno1)),
                   main = "Cell Ratio",
                   gaps_row = c(1),
      gaps_col = c(6,10,13,18,21,23),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 2.8) + coord_fixed(ratio = 0.95),
  DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt", #split.by = "cnt", 
        ncol = 1, cols = color.FB[c(5,1)]) + coord_fixed(ratio = 0.95),
ncol = 1)

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt", split.by = "cnt", 
        ncol = 4, cols = color.FB[c(5,1)])

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt", #split.by = "cnt", 
        ncol = 1, cols = color.FB[c(5,1)])

pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$preAnno1)[c(3:10),],
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(4,7,9,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F)

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$preAnno1)[c(3:10),]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(4,7,9,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F)

VlnPlot(subset(GEX.seur,subset=preAnno1 %in% c("PSN.1","PSN.2")), features = c("Calcb","Nmu","Il13ra1","Il4"), group.by = "FB.info", ncol = 2)

VlnPlot(subset(GEX.seur,subset=preAnno1 %in% c("PSN.1","PSN.2")), features = c("Calcb","Nmu","Il13ra1","Il4"), group.by = "cnt", ncol = 2, cols = c("#00BFC4","#F8766D"))

pp.list <- VlnPlot(subset(GEX.seur,subset=preAnno1 %in% c("PSN.1","PSN.2")), features = c("Calcb","Nmu","Il13ra1","Il4ra",
                                                                                          "Aff2","Gpr149","Ano2","Ntrk3"), group.by = "cnt", ncol = 2, cols = c("#00BFC4","#F8766D"),
                   pt.size = 0.05,
                   combine = F)

pp.list <- lapply(pp.list,function(p){
  p + ylim(c(0,6)) +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black") + ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("Baf53Nb.CL","Baf53Nb.IF")),
                               label.y = c(5.3),
                               size=3.5) + NoLegend()
})
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
cowplot::plot_grid(
  plotlist = pp.list,
  ncol = 4
)

 VlnPlot(subset(GEX.seur,subset=preAnno1 %in% c("PSN.1","PSN.2")), features = c("Nmu"), group.by = "cnt", ncol = 2, cols = c("#00BFC4","#F8766D"),
                   combine = T) + ylim(c(0,6)) + ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("Baf5Nb.IF","Baf5Nb.CL")),
                               label.y = c(5),
                               size=2.5
                               ) + NoLegend()

VlnPlot(GEX.seur, features = c("Calcb","Nmu","Il13ra1","Il4"), group.by = "FB.info", ncol = 2)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$preAnno2)[c(3:10),],
                   main = "Cell Count",
                   #gaps_row = c(2),
      gaps_col = c(5),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$preAnno2)[c(3:10),]),
                   main = "Cell Ratio",
                   #gaps_row = c(2),
      gaps_col = c(5),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent = T)$gtable,
ncol = 1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 2, pt.size = 0.01, group.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 2, pt.size = 0.01, group.by = "preAnno1")

markers.new2 <- c(PEMN=c("Chat","Gfra2","Casz1","Xylt1",
                         "Ptprt",
                         #""
                         "Bnc2","Tox",
                         "Oprk1",
                         "Kalrn","Pi15","Drd2",
                         "Adamtsl1", "Gabrb1",
                         "Fbxw15","Fbxw24","Cdc14a","Chrna7",
                         "Caln1","Tmem132c","Piezo1",
                         "Satb1","Cntnap5b",
                         "Nxph1","Lama2", "Erbb4",
                         "Tac1","Lrrc7","Ryr3","Eda",
                         "Chgb","Pgm5","Shc4","Vgll3",
                         "Ptn","Man1a"),
                 PIMN=c("Nos1","Gfra1","Etv1",
                        "Creb5","Airn","Enpp1","Unc13c",
                        "Plpp3",
                        "Fat1","Lgr5","Gabrb2",
                        "Fndc1",
                        "Adcy2","S100a4","Npy",
                        "Cmah",
                        "Vip","Pde1a","Ebf1","Gpc5",
                        "Col25a1","Cartpt"),
                 PIN=c("Penk","Ntrk2","Kctd8","Pde7b",
                       "Fut9","Nfatc1","Egfr","Mgll",
                       "Ntn1","Prlr",
                       "Chrm3","Skap1","L3mbtl4","Fam107b",
                       "Tafa2","Nxph2","Gm32647","Gm29521",
                       #"Grp",
                       "Ntng1","Kcnh7",
                       "Bmpr1b","Rerg","Npy1r",
                       #"Slc8a1","Slc8a2","Slc29a1","Slc29a2","Slc29a3","Slc29a4",
                       "Piezo2","Abca9","Abca6","Abca8b"),
                 PSN=c("Calcb","Calb2","Nmu","Kcnj12",
                       "Nog","Bmp4","Adgrg6","Cysltr2",
                       "Pcdh10","Ngfr","Galr1","Met",
                       "Htr3a",
                       "Il7",
                       "Aff2","Gpr149",
                       "Efr3a","Cdh6","Cdh8","Pdzrn4",
                       "Clstn2","Cachd1","Ano2","Ntrk3","Cpne4",
                       "Vwc2l","Cdh9","Scgn",
                       "Vcan","Cck","Sst","Adamts9",
                       "Kcnn2","Pantr1","Vwc2","Vipr2"),
                 PSVN=c("Moxd1","Tacr1",
                        "Sctr","Npas3","Synpr",
                        "Kcnk13","St18","Gal"))


pm.CL_new <- DotPlot(subset(GEX.seur,subset=cnt %in% c("Baf53Nb.CL")), features = as.vector(unlist(markers.new2)), group.by = "preAnno1",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new

pm.All_new <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new2)), group.by = "preAnno1",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno1") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno1")

(DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno2") + coord_fixed(1.3)) +
 ( DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno2")+ coord_fixed(1.3))

DoubletFinder

library(DoubletFinder)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR

## NULL
## [1] "Creating 3141 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 3141 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

#saveRDS(GEX.seur,"./GEX.seur.preAnno0717.rds")